package com.startx.http.system.text;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
 * 敏感词过滤
 */
public class WordFilter {
	/**
	 * 初始化敏感词库
	 */
	@SuppressWarnings("rawtypes")
	private static Map sensitiveWordMap = new SensitiveContext().initKeyWord();
	
	/**
	 * 替换敏感词
	 * @param text       输入文本
	 * @param distence   文本距离
	 * @param symbol     替换符号
	 */
	public static String replace(final String text,final int distence,final char symbol) {
		char[] charest = text.toCharArray();
		for (int i = 0; i < charest.length; i++) {
			FlagIndex fi = getFlagIndex(charest, i, distence);
			if (fi.isFlag()) {
				for (int j : fi.getIndex()) {
					charest[j] = symbol;
				}
			}
		}
		return new String(charest);
	}

	/**
	 * 是否包含敏感词
	 * @param text       输入文本
	 * @param distence   文本距离
	 */
	public static boolean include(final String text,final int distence) {
		boolean flag = false;
		char[] charest = text.toCharArray();
		for (int i = 0; i < charest.length; i++) {
			flag = getFlagIndex(charest, i, distence).isFlag();
			if (flag) {
				break;
			}
		}
		return flag;
	}

	/**
	 * 获取敏感词数量
	 * @param text       输入文本
	 * @param distence   文本距离
	 */
	public static int wordcount(final String text,final int distence) {
		int count = 0;
		char[] charest = text.toCharArray();
		for (int i = 0; i < charest.length; i++) {
			FlagIndex fi = getFlagIndex(charest, i, distence);
			if (fi.isFlag()) {
				count ++;
			}
		}
		return count;
	}
	
	/**
	 * 获取敏感词列表
	 * @param text       输入文本
	 * @param distence   文本距离
	 */
	public static List<String> wordlist(final String text,final int distence) {
		List<String> sensitives = new ArrayList<>();
		char[] charest = text.toCharArray();
		for (int i = 0; i < charest.length; i++) {
			FlagIndex fi = getFlagIndex(charest, i, distence);
			if (fi.isFlag()) {
				StringBuffer buffer = new StringBuffer();
				for (int j : fi.getIndex()) {
					char word = text.charAt(j);
					buffer.append(word);
				}
				sensitives.add(buffer.toString());
			}
		}
		return sensitives;
	}
	
	/**
	 * 获取标记索引
	 * @param charest    输入文本
	 * @param begin      检测起始
	 * @param distence   文本距离
	 */
	@SuppressWarnings("rawtypes")
	private static FlagIndex getFlagIndex(final char[] charest,final int begin,final int distence) {
		FlagIndex fi = new FlagIndex();
		
		Map nowMap = sensitiveWordMap;
		boolean flag = false;
		int count = 0;
		List<Integer> index = new ArrayList<>();
		for (int i = begin; i < charest.length; i++) {
			char word = charest[i];
			Map mapTree = (Map) nowMap.get(word);
			if (count > distence || (i == begin && Objects.isNull(mapTree))) {
				break;
			}
			if (!Objects.isNull(mapTree)) {
				nowMap = mapTree;
				count = 0;
				index.add(i);
			} else {
				count++;
			}
			if ("1".equals(nowMap.get("isEnd"))) {
				flag = true;
				break;
			}
		}

		fi.setFlag(flag);
		fi.setIndex(index);

		return fi;
	}

	/**
	 * 初始化敏感词库，将敏感词加入到HashMap中，构建DFA算法模型
	 */
	private static class SensitiveContext {
		private String ENCODING = "UTF-8"; // 字符编码
		@SuppressWarnings("rawtypes")
		public HashMap sensitiveWordMap;

		/**
		 * 初始化
		 */
		@SuppressWarnings("rawtypes")
		public Map initKeyWord() {
			try {
				// 读取敏感词库
				Set<String> keyWordSet = readSensitiveWordFile();
				// 将敏感词库加入到HashMap中
				addSensitiveWordToHashMap(keyWordSet);
			} catch (Exception e) {
				e.printStackTrace();
			}
			return sensitiveWordMap;
		}

		/**
		 * 读取敏感词库，将敏感词放入HashSet中，构建一个DFA算法模型：<br>
		 * 中 = { isEnd = 0 国 = {<br>
		 * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = 1 }
		 * } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 } } } }
		 */
		@SuppressWarnings({ "rawtypes", "unchecked" })
		private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
			sensitiveWordMap = new HashMap(keyWordSet.size()); // 初始化敏感词容器，减少扩容操作
			String key = null;
			Map nowMap = null;
			Map<String, String> newWorMap = null;
			// 迭代keyWordSet
			Iterator<String> iterator = keyWordSet.iterator();
			while (iterator.hasNext()) {
				key = iterator.next(); // 关键字
				nowMap = sensitiveWordMap;
				for (int i = 0; i < key.length(); i++) {
					char keyChar = key.charAt(i); // 转换成char型
					Object wordMap = nowMap.get(keyChar); // 获取
					if (wordMap != null) { // 如果存在该key，直接赋值
						nowMap = (Map) wordMap;
					} else { // 不存在则，则构建一个map，同时将isEnd设置为0，因为他不是最后一个
						newWorMap = new HashMap<String, String>();
						newWorMap.put("isEnd", "0"); // 不是最后一个
						nowMap.put(keyChar, newWorMap);
						nowMap = newWorMap;
					}

					if (i == key.length() - 1) {
						nowMap.put("isEnd", "1"); // 最后一个
					}
				}
			}
		}

		/**
		 * 读取敏感词库中的内容，将内容添加到set集合中
		 */
		private Set<String> readSensitiveWordFile() throws Exception {
			Set<String> set = null;

			InputStreamReader read = new InputStreamReader(
					this.getClass().getResourceAsStream("/dfa/sensitiveword.dfa"), ENCODING);
			try {
				set = new HashSet<String>();
				BufferedReader bufferedReader = new BufferedReader(read);
				String txt = null;
				while ((txt = bufferedReader.readLine()) != null) { // 读取文件，将文件内容放入到set中
					set.add(txt);
				}
			} catch (Exception e) {
				throw e;
			} finally {
				read.close(); // 关闭文件流
			}
			return set;
		}
	}

	/**
	 * 敏感词标记
	 * @author minghu.zhang
	 */
	private static class FlagIndex {
		/**
		 * 标记结果
		 */
		private boolean flag;
		/**
		 * 标记索引
		 */
		private List<Integer> index;

		public boolean isFlag() {
			return flag;
		}

		public void setFlag(boolean flag) {
			this.flag = flag;
		}

		public List<Integer> getIndex() {
			return index;
		}

		public void setIndex(List<Integer> index) {
			this.index = index;
		}

	}
}
